python: import os; os.chdir(r'D:\OneDrive\Documents\Work\Library\Connectivity\Hjort & Poulsen')

global APIkey AIzaSyDwrJUh5BhQhleO4WBlmSjuzh58v4Ik2ic  // using this key can cost Open Philanthropy

* re-geocode decipherable Akamai place names through Google Maps

use "D:\OneDrive\Documents\Work\Library\Connectivity\Hjort and Poulsen 2019\data\akamai" ///
  if (inlist(country,"KE","MZ","ZA","TZ","BJ","GH") | inlist(country,"MG","NG","SN","TG","CD","NA")) /*inlist(country,"BJ","CD","KE","MG","NG","ZA","TZ")*/ & ip_count>10, clear
collapse year, by(city country)
drop year

python
from sfi import Data; import requests; import pandas as pd
cities = Data.get('city')
countries = Data.get('country')
lat1 = []; long1 = []; lat2 = []; long2 = []
payload1 = {'key' : '$APIkey'}
payload2 = {'key' : '$APIkey'}
for place in zip(cities, countries):
  cityname = 'Paarden Eiland' if place[0]=='PAARDENEILAND' else 'Dar es Salaam' if place[0]=='DARESSALAAM' else 'Athi River' if place[0]=='ATHIRIVER' else 'Cape Peninsula' if place[0]=='PENINSULA' else 'Halfway House Estate' if place[0]=='HALFWAYHOUSE' else 'Beacon Bay' if place[0]=='BEACONBAY' else 'Bedford View' if place[0]=='BEDFORDVIEW' else 'Cape Town' if place[0]=='CAPETOWN' else 'Capital Park' if place[0]=='CAPITALPARK' else 'Green Point' if place[0]=='GREENPOINT' else 'Port Elizabeth' if place[0]=='PORTELIZABETH' else 'Port Harcourt' if place[0]=='PORTHARCOURT' else 'Richards Bay' if place[0]=='RICHARDSBAY' else 'TYGERBERG HILLS' if place[0]=='TYGERBERGHILLS' else 'East London' if place[0]=='LONDON' and place[1]=='ZA' else place[0].lower()

  # try components & region specification (https://developers.google.com/maps/documentation/geocoding/requests-geocoding#component-filtering)
  thislat = thislong = -1000
  payload1['region'] = place[1].lower()
  payload1['components'] = 'locality:'+cityname+'|ISO2:'+place[1]
  result = requests.get('https://maps.googleapis.com/maps/api/geocode/json', params=payload1).json()['results']
  if len(result):
      types = result[0]['types']
      if 'locality' in types or 'sublocality' in types or 'neighborhood' in types:
          thislong= result[0]['geometry']['location']['lng']
          thislat = result[0]['geometry']['location']['lat']
  lat1.append(thislat)
  long1.append(thislong)
  print([place[0], place[1], thislat, thislong])

  # try free text address query
  if place[0]=='MEDUNSA':  # manual codings from Google
    thislong, thislat = 28.0160807, -25.6191508
  elif place[0]=='ROGGEBAAI':
    thislong, thislat = 18.4268107, -33.9172161
  else:
    thislat = thislong = -1000
    payload2['address'] = cityname + ', ' + place[1]
    result = requests.get('https://maps.googleapis.com/maps/api/geocode/json', params=payload2).json()['results']
    if len(result):
        types = result[0]['types']
        if 'locality' in types or 'sublocality' in types or 'neighborhood' in types:
            thislong= result[0]['geometry']['location']['lng']
            thislat = result[0]['geometry']['location']['lat']
  lat2.append(thislat)
  long2.append(thislong)
  print([place[0], place[1], thislat, thislong])
  
df = pd.DataFrame(zip(cities, countries, long1, lat1, long2, lat2))
df.rename(columns = {0:'city', 1:'ISO2', 2:'long1', 3:'lat1', 4:'long2', 5:'lat2'}, inplace=True)
df.to_csv('Replication\\Akamai Google geocoding.csv', index=False, mode='w')
end


* re-geocode Akamai place names through Nominatim
python
from sfi import Data; import requests, time; import pandas as pd

places = Data.get('city country')
payload = {'format':'json'}
results = []
for place in places:
  cityname = 'Paarden Eiland' if place[0]=='PAARDENEILAND' else 'Dar es Salaam' if place[0]=='DARESSALAAM' else 'Athi River' if place[0]=='ATHIRIVER' else 'Cape Peninsula' if place[0]=='PENINSULA' else 'Halfway House Estate' if place[0]=='HALFWAYHOUSE' else 'Beacon Bay' if place[0]=='BEACONBAY' else 'Bedford View' if place[0]=='BEDFORDVIEW' else 'Cape Town' if place[0]=='CAPETOWN' else 'Capital Park' if place[0]=='CAPITALPARK' else 'Green Point' if place[0]=='GREENPOINT' else 'Port Elizabeth' if place[0]=='PORTELIZABETH' else 'Port Harcourt' if place[0]=='PORTHARCOURT' else 'Richards Bay' if place[0]=='RICHARDSBAY' else 'TYGERBERG HILLS' if place[0]=='TYGERBERGHILLS' else 'East London' if place[0]=='LONDON' and place[1]=='ZA' else place[0].lower()
  payload['q'], payload['countrycodes'] = cityname, place[1]
  result = requests.get('https://nominatim.openstreetmap.org/search', params=payload).json()
  if len(result):
    _df = pd.DataFrame([(place[0], place[1], r['lon'], r['lat'], r['osm_type'], r['class'], r['display_name'], r['type'], r['importance'], r['place_id'], 0) for r in result if r['class'] in ['place','boundary']])
    if len(_df):
      _df.rename(columns = {0:'city', 1:'ISO2', 2:'long', 3:'lat', 4:'osm_type', 5:'class', 6:'display_name', 7:'type', 8:'importance', 9: 'place_id', 10:'keep'}, inplace=True)
      _df.at[0,'keep'] = 1 if _df.shape[0]==1 else int(_df.at[0,'importance'] > _df.at[1,'importance'])
      results.append(_df)
  print([place[0], place[1]])
  time.sleep(1)  # Nominatim-required delay 

df = pd.concat(results)
df.to_csv('Replication\\Akamai Nominatim geocoding.csv', index=False, mode='w')
end


* determine countries of HP19's list of "large" Akamai cities

clear
input str50 city
ABIDJAN
ABOMEY
ABUJA
ACCRA
ADISABEBA
ANTANANARIVO
ARUSHA
BATA
BEIRA
BLANTYRE
BLOEMFONTEIN
BO
BOBODIOULASSO
BOSASO
BOUAKE
BRAZZAVILLE
BUJUMBURA
BULAWAYO
CAMAYENNE
CAPETOWN
CASABLANCA
CLAREMONT
CONAKRY
COTONOU
CUREPIPE
DAKAR
DARESSALAAM
DJIBOUTI
DJOUGOU
DOUALA
DURBAN
FES
FRANCEVILLE
FRANCISTOWN
FREETOWN
GABORONE
GREENPOINT
HARARE
HARGEISA
HELDERBERG
HUAMBO
IBADAN
JOHANNESBURG
KAMPALA
KANO
KHARTOUM
KIGALI
KINSHASA
KISUMU
KITWE
KUMASI
KWAZULU
LAGOS
LIBREVILLE
LILONGWE
LOBITO
LOME
LONDON
LUANDA
LUBUMBASHI
LUSAKA
LYNNWOOD
MALABO
MAPUTO
MARSHALLTOWN
MBUJIMAYI
MEDUNSA
MIDRAND
MODDERFONTEIN
MOGADISHU
MOMBASA
MONROVIA
MORONI
MPUMALANGA
MWANZA
NAIROBI
NDOLA
NEVES
NEWLANDS
NOUADHIBOU
NOUAKCHOTT
NZEREKORE
OUAGADOUGOU
PARKVIEW
POINTENOIRE
PORTGENTIL
PORTLOUIS
PORTSUDAN
PRAIA
PRETORIA
RABAT
RIVONIA
ROSSLYN
SAOTOME
SERREKUNDA
SILVERTON
TAMALE
VACOAS
WALVISBAY
WAVERLEY
WAVERLY
WINDHOEK
WYNBERG
YAOUNDE
end

python
from sfi import Data; import requests; import pandas as pd
cities = Data.get('city')
ISO2 = []
payload2 = {'key' : '$APIkey'}
for city in cities:
  cityname = 'Paarden Eiland' if city=='PAARDENEILAND' else 'Dar es Salaam' if city=='DARESSALAAM' else 'Athi River' if city=='ATHIRIVER' else 'Cape Peninsula' if city=='PENINSULA' else 'Halfway House Estate' if city=='HALFWAYHOUSE' else 'Beacon Bay' if city=='BEACONBAY' else 'Bedford View' if city=='BEDFORDVIEW' else 'Cape Town' if city=='CAPETOWN' else 'Capital Park' if city=='CAPITALPARK' else 'Green Point' if city=='GREENPOINT' else 'Port Elizabeth' if city=='PORTELIZABETH' else 'Port Harcourt' if city=='PORTHARCOURT' else 'Richards Bay' if city=='RICHARDSBAY' else 'TYGERBERG HILLS' if city=='TYGERBERGHILLS' else 'East London' if city=='LONDON' and place[1]=='ZA' else city.lower()

  payload2['address'] = cityname + ', Africa'
  country = ''
  result = requests.get('https://maps.googleapis.com/maps/api/geocode/json', params=payload2).json()['results']
  if len(result):
      _country = [r['short_name'] for r in result[0]['address_components'] if 'country' in r['types']]
      if len(_country):
          country = _country[0]
  ISO2.append(country)
  print([city, country])
  
df = pd.DataFrame(zip(cities, ISO2))
df.rename(columns = {0:'city', 1:'ISO2'}, inplace=True)
df.to_csv('Replication\\Akamai big cities countries.csv', index=False, mode='w')
end


* re-geocode Afrobarometer place names through Google Maps
global APIkey AIzaSyDwrJUh5BhQhleO4WBlmSjuzh58v4Ik2ic  // using this key can cost Open Philanthropy

set odbcdriver unicode // takes effect on restart
odbc load, clear dsn("Hjort & Poulsen 2019") exec("select * from Afrobarometer.places")

python
from sfi import Data; import requests, math, time; import pandas as pd
iso2s = Data.get('iso2')
countrynames = Data.get('country_name')
places = Data.get('place')
lat1 = []; long1 = []; lat2 = []; long2 = []; area1 = []; area2 = []
payload1 = {'key' : '$APIkey'}
payload2 = {'key' : '$APIkey'}
for place in zip(places, iso2s, countrynames):
    # try components & region specification
    thislat = thislong = area = -1000
    payload1['components'] = 'locality:' + place[0] +'|ISO2:' + place[1]
    result = requests.get('https://maps.googleapis.com/maps/api/geocode/json', params=payload1).json()['results']
    if len(result):
        types = result[0]['types']
        thislat = result[0]['geometry']['location']['lat']
        thislong= result[0]['geometry']['location']['lng']
        viewport = result[0]['geometry']['viewport']
        thisarea = (viewport['northeast']['lat'] - viewport['southwest']['lat']) * (viewport['northeast']['lng'] - viewport['southwest']['lng']) * math.cos(math.radians(thislat))
    lat1.append(thislat)
    long1.append(thislong)
    area1.append(thisarea)
    print([place[0], place[1], thislat, thislong, thisarea])

    # try free text address query
    thislat = thislong = area = -1000
    payload2['address'] = place[0] + ', ' + place[2]
    result = requests.get('https://maps.googleapis.com/maps/api/geocode/json', params=payload2).json()['results']
    if len(result):
        types = result[0]['types']
        thislat = result[0]['geometry']['location']['lat']
        thislong= result[0]['geometry']['location']['lng']
        viewport = result[0]['geometry']['viewport']
        area = (viewport['northeast']['lat'] - viewport['southwest']['lat']) * (viewport['northeast']['lng'] - viewport['southwest']['lng']) * math.cos(math.radians(thislat))
    lat2.append(thislat)
    long2.append(thislong)
    area2.append(thisarea)
    print([place[0], place[1], thislat, thislong, thisarea])

df = pd.DataFrame(zip(places, iso2s, long1, lat1, area1, long2, lat2, area2))
df.rename(columns = {0:'place', 1:'iso2', 2:'long1', 3:'lat1', 4:'area1', 5:'long2', 6:'lat2', 7:'area2'}, inplace=True)
df.to_csv('Replication\\Afrobarometer Google geocoding.csv', index=False, mode='w')
end

// * re-geocode Afrobarometer place names through Nominatim...returns no results for many localities...
// python
// payload = {'format':'json'}
// iso2s = Data.get('iso2')
// places = Data.get('place')
// # results = []
// for place in zip(places, iso2s):
//     payload['q'] = place[0].lower()
//     payload['countrycodes'] = place[1]
//     try:
//       result = requests.get('https://nominatim.openstreetmap.org/search', params=payload).json()
//     except:
//       result = requests.get('https://nominatim.openstreetmap.org/search', params=payload).json()
//     if len(result):
//         _df = pd.DataFrame([(place[0], place[1], r['lon'], r['lat'], (float(r['boundingbox'][1]) - float(r['boundingbox'][0])) * (float(r['boundingbox'][3]) - float(r['boundingbox'][2])) * math.cos(math.radians(float(r['lat']))), r['osm_type'], r['class'], r['display_name'], r['type'], r['importance'], False) for r in result if r['class'] in ['place','boundary']])
//         if len(_df):
//           _df.rename(columns = {0:'Place', 1:'iso2', 2:'long', 3:'lat', 4:'area', 5:'osm_type', 6:'class', 7:'display_name', 8:'type', 9:'importance', 10:'keep'}, inplace=True)
//           _df.at[0,'keep'] = True if _df.shape[0]==1 else _df.at[0,'importance'] > _df.at[1,'importance']
//           results.append(_df)
//     print(place)
//     time.sleep(1)  # Nominatim-required delay
// df = pd.concat(results)
// df.to_csv('Replication\\Afrobarometer Nominatim geocoding.csv', index=False, mode='w')
// end



* re-geocode LMMIS place names through Google Maps
global APIkey AIzaSyDwrJUh5BhQhleO4WBlmSjuzh58v4Ik2ic  // using this key can cost Open Philanthropy

use "D:\OneDrive\Documents\Work\Library\Connectivity\Hjort and Poulsen 2019\data\lmmis", clear
decode Regcode, gen(Regname)
decode Zoncode, gen(Zonname)
decode Worcode, gen(Worname)
replace Worname = substr(Worname, 1, strlen(Worname)-6) if regexm(Worname,".+/TOWN/")
replace Worname = substr(Worname, 1, strlen(Worname)-7) if regexm(Worname,".+/TOWN/-")
collapse year, by(*name)
drop year

python
from sfi import Data; import requests
regions = Data.get('Regname')
zones = Data.get('Zonname')
woredas = Data.get('Worname')
lat1 = []; long1 = []; lat2 = []; long2 = []
payload1 = {'key' : '$APIkey', 'region' : 'ET'}
payload2 = {'key' : '$APIkey'}
for place in zip(regions, zones, woredas):
    locality = place[2].lower()  # place[0].lower() + ', ' + place[1].lower() + ', ' + place[2].lower()

    # try components & region specification
    thislat = thislong = -1000
    payload1['components'] = 'locality:' + locality
    result = requests.get('https://maps.googleapis.com/maps/api/geocode/json', params=payload1).json()['results']
    if len(result):
        types = result[0]['types']
        if 'locality' in types or 'sublocality' in types or 'neighborhood' in types:
            thislat = result[0]['geometry']['location']['lat']
            thislong= result[0]['geometry']['location']['lng']
    lat1.append(thislat)
    long1.append(thislong)
    print([place[0], place[1], thislat, thislong])

    # try free text address query
    thislat = thislong = -1000
    payload2['address'] = locality + ', ET'
    result = requests.get('https://maps.googleapis.com/maps/api/geocode/json', params=payload2).json()['results']
    if len(result):
        types = result[0]['types']
        if 'locality' in types or 'sublocality' in types or 'neighborhood' in types:
            thislat = result[0]['geometry']['location']['lat']
            thislong= result[0]['geometry']['location']['lng']
    lat2.append(thislat)
    long2.append(thislong)
    print([place[0], place[1], place[2], thislat, thislong])

df = pd.DataFrame(zip(regions, zones, woredas, lat1, long1, lat2, long2))
df.rename(columns = {0:'Regname', 1:'Zonname', 2:'Worname', 3:'lAT1', 4:'lONG1', 5:'lAT2', 6:'long2'}, inplace=True)
df.to_csv('Replication\\LMMIS Google geocoding.csv', index=False, mode='w')
end


* re-geocode LMMIS place names through Nominatim
use "D:\OneDrive\Documents\Work\Library\Connectivity\Hjort and Poulsen 2019\data\lmmis", clear
decode Regcode, gen(Regname)
decode Zoncode, gen(Zonname)
decode Worcode, gen(Worname)
replace Worname = substr(Worname, 1, strlen(Worname)-6) if regexm(Worname,".+/TOWN/")
replace Worname = substr(Worname, 1, strlen(Worname)-7) if regexm(Worname,".+/TOWN/-")
collapse year, by(*name)
drop year

python
from sfi import Data; import requests, time; import pandas as pd
places = Data.get('Regname Zonname Worname')
payload = {'format':'json', 'countrycodes':'ET'}
results = []
for place in places:
    payload['q'] = place[2].lower()  # place[0].lower() + ', ' + place[1].lower() + ', ' + place[2].lower()
    result = requests.get('https://nominatim.openstreetmap.org/search', params=payload).json()
    if len(result):
        _df = pd.DataFrame([(place[0], place[1], place[2], r['lon'], r['lat'], r['osm_type'], r['class'], r['display_name'], r['type'], r['importance'], False) for r in result if r['class'] in ['place','boundary']])
        if len(_df):
          _df.rename(columns = {0:'Regname', 1:'Zonname', 2:'Worname', 3:'long', 4:'lat', 5:'osm_type', 6:'class', 7:'display_name', 8:'type', 9:'importance', 10:'keep'}, inplace=True)
          _df.at[0,'keep'] = True if _df.shape[0]==1 else _df.at[0,'importance'] > _df.at[1,'importance']
          results.append(_df)
    print([place[0], place[1], place[2]])
    time.sleep(1)  # Nominatim-required delay 
df = pd.concat(results)
df.to_csv('Replication\\LMMIS Nominatim geocoding.csv', index=False, mode='w')
end


* re-geocode LMMIS place names through MapQuest, since Hjort says that's what they used
use "D:\OneDrive\Documents\Work\Library\Connectivity\Hjort and Poulsen 2019\data\lmmis", clear
decode Regcode, gen(Regname)
decode Zoncode, gen(Zonname)
decode Worcode, gen(Worname)
replace Worname = substr(Worname, 1, strlen(Worname)-6) if regexm(Worname,".+/TOWN/")
replace Worname = substr(Worname, 1, strlen(Worname)-7) if regexm(Worname,".+/TOWN/-")
collapse year, by(Worcode *name)
drop year

python
from sfi import Data; import requests, time; import pandas as pd
places = Data.get('Worcode Regname Zonname Worname')
payload = {'key':'HBVxVqkbV8UjWAYwPsF0btQXATn96GqX', 'maxResults':'1'}
results = []
for place in places:
    payload['location'] = place[3].lower() + ', Ethiopia'
    result = requests.get('https://www.mapquestapi.com/geocoding/v1/address', params=payload).json()
    if len(result):
        ll = result['results'][0]['locations'][0]['latLng']
        _df = pd.DataFrame([(place[0], place[1], place[2], place[3], ll['lng'], ll['lat'])])
        if len(_df):
          _df.rename(columns = {0:'Worcode', 1:'Regname', 2:'Zonname', 3:'Worname', 4:'long', 5:'lat'}, inplace=True)
          results.append(_df)
    print([place[1], place[2], place[3]])
df = pd.concat(results)
df.to_csv('Replication\\LMMIS MapQuest geocoding.csv', index=False, mode='w')
end


